# Script which generates txt2vid videos.

# params
# pass from the terminal
# argument 1: name of content
#             e.g. PT
# argument 2: resemble user name -- check resemble_config.json

# Example command:
# bash get_txt2vid_data.sh PT Pulkit
# use bash coz changing directories inside shell script. ./get_txt2vid_data.sh will launch subprocesses and cd
# won't work.

# Works as:
# Step 1: Convert driving video in appropriate format, resolution and fps; and encode at ~ orig quality (crf)
# Step 2: Generate appropriate audio files -- as is, resemble, google
# Step 3: Run wav2lip to generate final videos

# Note:
# for generating videos with audio as is, ensure it is already in appropriate folder by running convert_audios_AAC.sh first

# params
content_name=${1}
resemble_user_name=${2}

# Specify codec
video_codec_name="AVC"
video_codec="libx264"
crf=20
audio_br=10

#video_name="originals/videos/${content_name}_original.mov"
# Driving audio
#audio_name="originals/audios/${content_name}_original_audio_AAC_br1k.m4a"
audio_name_encoded=$(ls originals/wav2lip_inputs/"${content_name}_audio_sr16000_br${audio_br}"*)
echo "${audio_name_encoded}"
text_name="originals/texts/${content_name}_original_transcript.txt"

#wavlip_driving_video="originals/wav2lip_inputs/${content_name}_driving_video.mov"
if [[ "${content_name}" = "YL" || "${content_name}" = "MS" ]]
then
  wav2lip_driving_video="originals/wav2lip_inputs/${content_name}_driving_video.mp4"
else
  wav2lip_driving_video="originals/wav2lip_inputs/${content_name}_driving_video.mov"
fi

wav2lip_input_path="originals/wav2lip_inputs/"
#output_path="prelim_expt/txt2vid_set/${content_name}/"
output_path="main_expt/txt2vid_set/${content_name}/"

# mkdir if it doesnt exist
mkdir -p "${wav2lip_input_path}"
mkdir -p "${output_path}"

## Step 1: Encode driving video in appropriate format
wav2lip_driving_video_encoded="originals/wav2lip_inputs/${content_name}_driving_video_${video_codec_name}_crf${crf}.mp4"
ffmpeg -y -i "${wav2lip_driving_video}" -c:v ${video_codec} -crf "${crf}" -vf "fps=25,scale=1280x720,format=yuv420p" \
 -an "${wav2lip_driving_video_encoded}"

###########################################################################
## (old) Step 1: clip original video to get frame and first 5s content
## requires avc video as input (codec in mov files generated by quicktime not supported by container)
#tmp_video_file="${wav2lip_input_path}${content_name}_full_input_video_AVC.mp4"
#wav2lip_driving_frame="${wav2lip_input_path}${content_name}_driving_frame.png"
#wav2lip_driving_video="${wav2lip_input_path}${content_name}_driving_video.mp4"
#echo "Get driving video for wav2lip"
#
## get avc video input at high quality -- generate if not already exists
#if [ -f "${tmp_video_file}" ]
#then
#  echo "AVC file for getting wav2lip exists."
#else
#  ffmpeg -y -i "${video_name}" -c:v libx264 -crf 10 -vf "fps=25,scale=1280x720,format=yuv420p" -an "${tmp_video_file}"
#fi
## get first frame
#if [ -f "${wav2lip_driving_frame}" ]
#then
#  echo "Driving frame file for getting wav2lip exists."
#else
#  ffmpeg -y -ss 1 -i "${tmp_video_file}" -vf "select=eq(n\,0)" -vframes 1 "${wav2lip_driving_frame}"
#  wait
#fi
## get first 5 secs
#to_clip=5
#if [ -f "${wav2lip_driving_video}" ]
#then
#  echo "Driving video file for getting wav2lip exists."
#else
#  ffmpeg -y -i "${tmp_video_file}" -t "${to_clip}" -c copy "${wav2lip_driving_video}"
#  wait
#fi
#
#############################################################################################

## Step 2: generate resemble and google audios from text script
echo 'generating audio data from text script and using google and resemble.'
google_audio_name="../dataset/${wav2lip_input_path}${content_name}_google_audio.m4a"
cd ../google_stt_tts
python tts.py "../dataset/${text_name}" "${google_audio_name}"
cd -

cd ../resemble_tts
resemble_audio_name="../dataset/${wav2lip_input_path}${content_name}_resemble_audio.m4a"
python tts_file.py -i "../dataset/${text_name}" -o "${resemble_audio_name}" -u "${resemble_user_name}"
cd -


## Step 3: generate wav2lip content from selected audio content, google and resemble file and both video and frame driving content
## change directory to wav2lip for running code

#wav2lip_driving_video="originals/wav2lip_inputs/PT_driving_video_recorded_AVC_crf30.mp4"
cd ../Wav2Lip

# original audio
#output_file_frame="${output_path}original_audio_driving_frame.mp4"
#output_file_video="${output_path}original_audio_driving_video_AAC_br1k_bitrateA_4009.mp4"
output_file_video="${output_path}${content_name}_driving_video_${video_codec_name}_crf${crf}_original_audio_br${audio_br}.mp4"
echo "generate wav2lip with original content"

### ensure running in right conda environment
## conda activate txt2vid
#python inference_streaming.py --checkpoint_path checkpoints/wav2lip_gan.pth \
#        --face "../dataset/${wav2lip_driving_frame}" \
#        --audio "../dataset/${audio_name}" \
#        --wav2lip_batch_size 1 \
#        --outfile "../dataset/${output_file_frame}"

python inference_streaming.py --checkpoint_path checkpoints/wav2lip_gan.pth \
        --face "../dataset/${wav2lip_driving_video_encoded}" \
        --audio "../dataset/${audio_name_encoded}" \
        --wav2lip_batch_size 1 \
        --outfile "../dataset/${output_file_video}"

## google audio
##output_file_frame="${output_path}google_audio_driving_frame.mp4"
##output_file_video="${output_path}google_audio_driving_video.mp4"
#output_file_video="${output_path}${content_name}_driving_video_${video_codec_name}_crf${crf}_google_audio.mp4"
#echo "generate wav2lip with google audio content"
#
##python inference_streaming.py --checkpoint_path checkpoints/wav2lip_gan.pth \
##        --face "../dataset/${wav2lip_driving_frame}" \
##        --audio "../dataset/${google_audio_name}" \
##        --wav2lip_batch_size 1 \
##        --outfile "../dataset/${output_file_frame}"
#
#python inference_streaming.py --checkpoint_path checkpoints/wav2lip_gan.pth \
#        --face "../dataset/${wav2lip_driving_video_encoded}" \
#        --audio "../dataset/${google_audio_name}" \
#        --wav2lip_batch_size 1 \
#        --outfile "../dataset/${output_file_video}"

# resemble audio
#output_file_frame="${output_path}resemble_audio_driving_frame.mp4"
#output_file_video="${output_path}resemble_audio_driving_video.mp4"
output_file_video="${output_path}${content_name}_driving_video_${video_codec_name}_crf${crf}_resemble_audio.mp4"

echo "generate wav2lip with resemble content"

#python inference_streaming.py --checkpoint_path checkpoints/wav2lip_gan.pth \
#        --face "../dataset/${wav2lip_driving_frame}" \
#        --audio "../dataset/${resemble_audio_name}" \
#        --wav2lip_batch_size 1 \
#        --outfile "../dataset/${output_file_frame}"

python inference_streaming.py --checkpoint_path checkpoints/wav2lip_gan.pth \
        --face "../dataset/${wav2lip_driving_video_encoded}" \
        --audio "../dataset/${resemble_audio_name}" \
        --wav2lip_batch_size 1 \
        --outfile "../dataset/${output_file_video}"
cd -